global projectdir "~"
global datadir "$projectdir/data"

log using "$projectdir/logs/construct_placebo_analysis_file.do", text replace


****************************************************************
****************************************************************

* These are the list of 6-digit NAICS codes of the high-tech
*  startups that were founded by treated UMETRICS PIKs whose
*  CFDA was shocked in a given year.

cd $datadir/intermediate_files
use naics_ht_start_w2_cfdashocks_2018q4, clear
keep if !missing(year_shock)
keep if ht_mode_2012 == 1
keep year_shock naics2012
rename naics2012 naics
duplicates drop
tempfile hold
save `hold', replace

cd $datadir/intermediate_files
use naics_ht_start_lehd_cfdashocks_2018q4, clear
keep if !missing(year_shock)
keep if ht_seinunit_mode_2012 == 1
keep year_shock naics2012fnl
rename naics2012fnl naics
duplicates drop
append using `hold'

duplicates drop
* This gives a list of treated NAICS codes and the years in which they were treated.
* By definition, all treated NAICS are high-tech NAICS.
cd $datadir/hightech_entre_placebo
save naics_shock_year, replace
 
****************************************************************
****************************************************************






****************************************************************
****************************************************************

* Start with firm age at lbdfid level
cd /projects/data_external/lbd/data
use lbdfirm_slim, clear
keep lbdfid year firmage_pay firmage_emp
mdesc
gen firmage = firmage_emp
replace firmage = firmage_pay if missing(firmage)
mdesc
keep lbdfid year firmage

* Add dominant NAICS codes for lbdfid
merge 1:1 lbdfid year using dominant_naics_lbdfid
drop _merge
keep lbdfid year firmage bds_vcnaics_dom
mdesc

* Add employment/pay/estabs (from estab-level file)
merge 1:m lbdfid year using lbd_slim
drop if _merge == 2
drop _merge

keep lbdfid lbdnum year firmage pay bds_emp bds_vcnaics_dom
mdesc

* Count estabs and sum pay/emp by lbfid-year
gen lbdnums = 1
collapse (sum) lbdnums pay bds_emp, by(lbdfid year firmage bds_vcnaics_dom) fast
mdesc

*duplicates report lbdfid year
*duplicates tag lbdfid firmage, gen(dup)
*tab dup
*gsort -dup lbdfid firmage

gen naics_4 = regexs(1) if regexm(bds_vcnaics, "^([0-9][0-9][0-9][0-9])[0-9][0-9]$")
do "/projects/old_9802_v2/c9802/ht_entre_placebo/flag_hightech_4digit_naics.do"
tab hightech

compress
cd $datadir/hightech_entre_placebo
save lbdfid_year, replace

****************************************************************
****************************************************************


****************************************************************
****************************************************************

cd $datadir/hightech_entre_placebo
use lbdfid_year, clear
keep if hightech == 1
compress
cd $datadir/hightech_entre_placebo
save lbdfid_year_hightech, replace

****************************************************************
****************************************************************



****************************************************************
****************************************************************

cd $datadir/hightech_entre_placebo
use lbdfid_year, clear

* Identify startups (age-0) firms
gen lbdfids_start = (firmage == 0)
tab lbdfids_start

gen lbdfids = 1
collapse (sum) lbdfids_start lbdfids lbdnums pay bds_emp, by(year bds_vcnaics_dom) fast

compress
cd $datadir/hightech_entre_placebo
save industry_dom_naics, replace

use industry_dom_naics, clear
duplicates report bds_vcnaics_dom year
tab year

****************************************************************
****************************************************************


****************************************************************
****************************************************************

cd $datadir/hightech_entre_placebo
use industry_dom_naics, clear
keep bds_vcnaics_dom
duplicates drop
tempfile naics
save `naics', replace

clear
set obs 19
gen year = _n + 1999
cross using `naics'

cd $datadir/hightech_entre_placebo
merge 1:1 bds_vcnaics_dom year using industry_dom_naics
replace lbdfids_start = 0 if _merge == 1
replace lbdfids = 0 if _merge == 1
replace lbdnums = 0 if _merge == 1
replace pay = 0 if _merge == 1
replace bds_emp = 0 if _merge == 1
drop _merge
   
order bds_vcnaics_dom year
sort bds_vcnaics_dom year

compress
cd $datadir/hightech_entre_placebo
save industry_dom_naics_balanced, replace

****************************************************************
****************************************************************



****************************************************************
****************************************************************

* Add shocks

cd $datadir/hightech_entre_placebo
use naics_shock_year, clear
* Identify first shock experienced by each NAICS
collapse (min) year_shock, by(naics) fast
tab year_shock

rename naics bds_vcnaics_dom
cd $datadir/hightech_entre_placebo
merge 1:m bds_vcnaics_dom using industry_dom_naics_balanced
gen treated = (_merge == 3)
drop _merge

gen event_year = year - year_shock

gen post_shock = (year >= year_shock)
replace post_shock = 0 if missing(year_shock)

egen naics_n = group(bds_vcnaics_dom)

gen naics_4 = regexs(1) if regexm(bds_vcnaics, "^([0-9][0-9][0-9][0-9])[0-9][0-9]$")
do "/projects/old_9802_v2/c9802/ht_entre_placebo/flag_hightech_4digit_naics.do"
tab hightech

gen nonstart = lbdfids - lbdfids_start

gen lstart = ln(lbdfids_start)
gen ihs_start = asinh(lbdfids_start)
gen ihs_lbdfids = asinh(lbdfids)
gen ihs_nonstarts = asinh(nonstart)

order naics_n bds_vcnaics_dom year treated year_shock event_year post_shock hightech lstart ihs_start
sort bds_vcnaics_dom year

compress
cd $datadir/hightech_entre_placebo
save industry_analysis_file, replace

****************************************************************
****************************************************************


log close


